#!/usr/bin/env python
# coding: utf-8

# In[481]:


import pandas as pd
import numpy as np
import os


# In[482]:


from python_speech_features import mfcc
import numpy as np

def get_feature( signal):
    mfcc_feature = mfcc(signal, 16000.)
    #print("-------")
    #print(mfcc_feature)
    if len(mfcc_feature) == 0:
        print >> sys.stderr, "ERROR.. failed to extract mfcc feature:", len(signal)
    return mfcc_feature


# In[520]:


def deal_mfcc(path, max_shape):
    '''
    这里是把所有mfcc都转换成一个维度，取最大的维度，然后根据最大的维度补全数据
    '''
    path_list = os.listdir(path)
    data_zero = np.array(np.zeros((1, max_shape, 13))).astype(float)
    
    for i in range(len(path_list)):
        txt_path = os.path.join(path, path_list[i])
        df = pd.read_table(txt_path, sep=',', encoding='utf-8')
        sig = df.iloc[:,1].values
        mfcc_df = get_feature(sig)
        mfcc_df = np.nan_to_num(mfcc_df)
        
        if mfcc_df.shape[0] == max_shape:
            mfcc_data = np.reshape(mfcc_df, (1, max_shape,13))
            data_zero = np.append(data_zero, mfcc_data, axis=0)

        if mfcc_df.shape[0] < max_shape:
            sert = int(max_shape - mfcc_df.shape[0])
            zero_insert = np.array(np.zeros((sert, 13))).astype(float)
            mfcc_data = np.append(mfcc_df, zero_insert,axis=0)
            mfcc_data = np.reshape(mfcc_data,(1,max_shape,13))
            data_zero = np.append(data_zero, mfcc_data, axis=0)
    data_zero = np.delete(data_zero,0,axis=0)    
    return data_zero


# In[523]:


def create_label(label_len,label_name):
    label = np.array(np.zeros((label_len,))).astype(float)
    label[label==0.] = label_name
    return label


# In[526]:


path_linkunling = '/home/kyle/work/coll/human_txt/linkunling/'
linkunling_data = deal_mfcc(path_linkunling,499)
linkunling_label = create_label(linkunling_data.shape[0] , 2)
#print(linkunling_label)


# In[ ]:


linkunling_label = np.array(np.zeros((linkunling_zero.shape[0],))).astype(float)
linkunling_label[linkunling_label==0.] = 2.


# In[518]:


path_linkunling = '/home/kyle/work/coll/human_txt/linkunling/'
path_linkunling_list = os.listdir(path_linkunling)

linkunling_zero = np.array(np.zeros((1,499,13))).astype(float)
for i in range(len(path_linkunling_list)):
    linkunling = os.path.join(path_linkunling,path_linkunling_list[i])
    df = pd.read_table(linkunling,sep=',',encoding='utf-8')
    sig = df.iloc[:,1].values
    mfcc_df = get_feature(sig)
    mfcc_df = np.nan_to_num(mfcc_df)
    #data_list.append(mfcc_df.shape[0])
    if mfcc_df.shape[0] == 499:
        mfcc_data = np.reshape(mfcc_df,(1,499,13))
        linkunling_zero = np.append(linkunling_zero, mfcc_data, axis=0)
    if mfcc_df.shape[0] < 499:
        sert = int(499-mfcc_df.shape[0])
        zero_insert = np.array(np.zeros((sert,13))).astype(float)
        mfcc_data = np.append(mfcc_df,zero_insert,axis=0)
        mfcc_data = np.reshape(mfcc_data,(1,499,13))
        linkunling_zero = np.append(linkunling_zero, mfcc_data, axis=0)
linkunling_zero = np.delete(linkunling_zero,0,axis=0)
linkunling_label = np.array(np.zeros((linkunling_zero.shape[0],))).astype(float)
linkunling_label[linkunling_label==0.] = 2.


# In[519]:


linkunling_zero.shape


# In[485]:


linkunling_label.shape


# In[517]:


print(linkunling_zero)


# In[486]:


path_jiefei = '/home/kyle/work/coll/human_txt/jiefei/'
path_jiefei_list = os.listdir(path_jiefei)

jiefei_zero = np.array(np.zeros((1,499,13))).astype(float)
for i in range(len(path_list)):
    jiefei = os.path.join(path_jiefei,path_jiefei_list[i])
    df = pd.read_table(jiefei,sep=',',encoding='utf-8')
    sig = df.iloc[:,1].values
    mfcc_df = get_feature(sig)
    mfcc_df = np.nan_to_num(mfcc_df)
    #data_list.append(mfcc_df.shape[0])
    if mfcc_df.shape[0] == 499:
        mfcc_data = np.reshape(mfcc_df,(1,499,13))
        jiefei_zero = np.append(jiefei_zero, mfcc_data, axis=0)
    if mfcc_df.shape[0] < 499:
        sert = int(499-mfcc_df.shape[0])
        zero_insert = np.array(np.zeros((sert,13))).astype(float)
        mfcc_data = np.append(mfcc_df,zero_insert,axis=0)
        mfcc_data = np.reshape(mfcc_data,(1,499,13))
        jiefei_zero = np.append(jiefei_zero, mfcc_data, axis=0)
jiefei_zero = np.delete(jiefei_zero,0,axis=0)
jiefei_label = np.array(np.zeros((jiefei_zero.shape[0],))).astype(float)
jiefei_label[jiefei_label==0.] = 1.


# In[487]:


jiefei_zero.shape


# In[488]:


jiefei_label.shape


# In[489]:


path_unknown = '/home/kyle/work/coll/human_txt/unknown/'
path_unknown_list = os.listdir(path_unknown)

unknown_zero = np.array(np.zeros((1,499,13))).astype(float)
for i in range(len(path_unknown_list)):
    unknown = os.path.join(path_unknown,path_unknown_list[i])
    df = pd.read_table(unknown,sep=',',encoding='utf-8')
    sig = df.iloc[:,1].values
    mfcc_df = get_feature(sig)
    mfcc_df = np.nan_to_num(mfcc_df)
    #data_list.append(mfcc_df.shape[0])
    if mfcc_df.shape[0] == 499:
        mfcc_data = np.reshape(mfcc_df,(1,499,13))
        unknown_zero = np.append(unknown_zero, mfcc_data, axis=0)
    if mfcc_df.shape[0] < 499:
        sert = int(499-mfcc_df.shape[0])
        zero_insert = np.array(np.zeros((sert,13))).astype(float)
        mfcc_data = np.append(mfcc_df,zero_insert,axis=0)
        mfcc_data = np.reshape(mfcc_data,(1,499,13))
        unknown_zero = np.append(unknown_zero, mfcc_data, axis=0)
unknown_zero = np.delete(unknown_zero,0,axis=0)
unknown_label = np.array(np.zeros((unknown_zero.shape[0],))).astype(float)


# In[490]:


unknown_zero.shape


# In[491]:


unknown_label.shape


# In[492]:


data = np.concatenate((jiefei_zero,unknown_zero,linkunling_zero),axis=0)
label = np.concatenate((jiefei_label,unknown_label,linkunling_label),axis=0)


# In[493]:


data.shape


# In[494]:


label.shape


# In[495]:


from collections import Counter
Counter(label)


# In[496]:


#from imblearn.over_sampling import SMOTE, ADASYN
#X_resampled_smote, y_resampled_smote = SMOTE().fit_sample(data, label)


# In[497]:


import keras
import matplotlib.pyplot as plt
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM, Flatten
from keras.optimizers import RMSprop, Adam, Nadam,Adamax
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
import io
from keras.layers import Input, Dense, LSTM, RepeatVector, Reshape, Permute,Bidirectional
from keras.models import Model
from keras.layers import Bidirectional, concatenate, Conv1D, MaxPooling1D, GlobalMaxPooling1D, Dropout, BatchNormalization
import gc
from keras.utils import plot_model, np_utils
import time
import datetime
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping
import warnings
from sklearn.model_selection import train_test_split
from numpy import random, mat
import multiprocessing
import os
import time
import math
from keras.utils import plot_model
warnings.filterwarnings('ignore')


# In[498]:


num_sleep_states = 3
#num_sleep_states = 2
samplecount = 1500


def model_manydim():
    print('Build model...')

    X1 = Input(shape=(499,13))
    #X2 = Input(shape=(3733,))
    #X3 = Input(shape=(622,))

    shared1_Conv1D = Conv1D(64, 64, activation='relu')
    shared2_Conv1D = Conv1D(64, 32, activation='relu')
    shared3_Conv1D = Conv1D(64, 16, activation='relu')
    shared4_Conv1D = Conv1D(64, 8, activation='relu')
    shared5_Conv1D = Conv1D(64, 4, activation='relu')

    shared_Conv1D = Sequential()

    shared_Conv1D.add(shared1_Conv1D)
    shared_Conv1D.add(MaxPooling1D(8))
    # shared_Conv1D.add(Dropout(0.2))

    shared_Conv1D.add(shared2_Conv1D)
    shared_Conv1D.add(MaxPooling1D(4))
    # shared_Conv1D.add(Dropout(0.2))

    shared_Conv1D.add(shared3_Conv1D)
    shared_Conv1D.add(MaxPooling1D(4))
    # shared_Conv1D.add(Dropout(0.2))

    shared_Conv1D.add(shared4_Conv1D)
    shared_Conv1D.add(MaxPooling1D(2))
    # shared_Conv1D.add(Dropout(0.2))

    shared_Conv1D.add(shared5_Conv1D)

    shared_Conv1D.add(GlobalMaxPooling1D())

    shared1_Conv1D_tmp = Conv1D(64, 16, activation='relu')
    shared2_Conv1D_tmp = Conv1D(64, 8, activation='relu')
    #shared3_Conv1D_tmp = Conv1D(64, 8, activation='relu')
    #shared4_Conv1D_tmp = Conv1D(64, 4, activation='relu')
    #shared5_Conv1D_tmp = Conv1D(64, 2, activation='relu')

    shared_Conv1D_tmp = Sequential()
    shared_Conv1D_tmp.add(shared1_Conv1D_tmp)
    shared_Conv1D_tmp.add(MaxPooling1D(4))
    shared_Conv1D_tmp.add(Dropout(0.2))
    shared_Conv1D_tmp.add(shared2_Conv1D_tmp)
    shared_Conv1D_tmp.add(MaxPooling1D(2))
    shared_Conv1D_tmp.add(Dropout(0.2))
    #shared_Conv1D_tmp.add(shared3_Conv1D_tmp)
    #shared_Conv1D_tmp.add(MaxPooling1D(2))
    # shared_Conv1D_tmp.add(Dropout(0.2))
    #shared_Conv1D_tmp.add(shared4_Conv1D_tmp)
    #shared_Conv1D_tmp.add(MaxPooling1D(2))
    # shared_Conv1D_tmp.add(Dropout(0.2))
    #shared_Conv1D_tmp.add(shared5_Conv1D_tmp)
    shared_Conv1D_tmp.add(GlobalMaxPooling1D())

    #tmpx1 = RepeatVector(1)(X1)
    #tmpx2 = RepeatVector(1)(X2)
    #tmpx3 = RepeatVector(1)(X3)
    #tmpx1 = Permute((2, 1))(tmpx1)
    #tmpx2 = Permute((2, 1))(tmpx2)
    #tmpx3 = Permute((2, 1))(tmpx3)

    tmpx1 = shared_Conv1D_tmp(X1)
    #tmpx2 = shared_Conv1D(tmpx2)
    #tmpx3 = shared_Conv1D_tmp(tmpx3)

    #
    tmpx1 = RepeatVector(1)(tmpx1)
    #tmpx2 = RepeatVector(1)(tmpx2)
    #tmpx3 = RepeatVector(1)(tmpx3)

    #tmpx = concatenate([tmpx1, tmpx2], axis=1)
    #tmpx = LSTM(256, activation='relu',return_sequences=True)(tmpx)
    tmpx = Flatten()(tmpx1)
    #tmpx = Dense(300, activation='relu')(tmpx)  # vip
    tmpx = Dense(64, activation='relu')(tmpx)
    #     tmpx = Dense(20,activation='relu')(tmpx)
    tmpx = Dropout(0.5)(tmpx)
    Y = Dense(num_sleep_states, activation='softmax')(tmpx)
    #Y = Dense(num_sleep_states, activation='sigmoid')(tmpx)
    #model = Model(inputs=[X1, X2], outputs=Y)
    model = Model(inputs=X1, outputs=Y)


    optimizer1 = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer1, metrics=['accuracy'])

    print('print model...')
    model.summary()
    # plot_model(model,to_file='model_Conv1D_final.png')
    return model


# In[499]:


def del_file(path):
    ls = os.listdir(path)
    for i in ls:
        c_path = os.path.join(path, i)
        if os.path.isdir(c_path):
           del_file(c_path)
        else:
            os.remove(c_path)
def shuffleindex(trainTexs):
    trainsamplecount = trainTexs.shape[0]
    shuffleindex = [i for i in range(trainsamplecount)]
    random.shuffle(shuffleindex)
    trainTexs = trainTexs[shuffleindex]
    return trainTexs
def getpathfiles(mypath, mylist):
    for root, dirs, files in os.walk(mypath):
        for file in files:
            filename, type = os.path.splitext(file)
            if (type == ".csv"):
                mylist.append(os.path.join(root, file))
    mylist = np.array(mylist)
    trainsamplecount = mylist.shape[0]
    shuffleindex = [i for i in range(trainsamplecount)]
    random.shuffle(shuffleindex)
    mylist = mylist[shuffleindex]
    return mylist

class Save(keras.callbacks.Callback):
    def __init__(self):
        self.max_acc = 0.0
        tmodelsavepath = mysavemodelpath
        tmymodelfiles = list()
        tmymodelfiles = getpathfiles(tmodelsavepath, tmymodelfiles)
        if (len(tmymodelfiles) > 0):
            mname = tmymodelfiles[0]  # kears_model_05_acc=0.7319999933242798.h5
            i1 = mname.find("=")
            i2 = mname.find(".h5")
            scroestr = mname[i1 + 1:i2]
            self.max_acc = float(scroestr)

    def on_epoch_begin(self, epoch, logs=None):
        pass

    def on_epoch_end(self, epoch, logs=None):
        self.val_acc = logs["val_acc"]
        if epoch != 0:
            if self.val_acc > self.max_acc:
                ename = str(epoch)
                if (epoch > 0):
                    ename = "0" + ename
                sname = str(samplecount) + "_"
                del_file(mysavemodelpath)
                model.save(mysavemodelpath + "/kears_model_" + sname + ename + "_acc=" + str(self.val_acc) + ".h5")
                self.max_acc = self.val_acc
                print("kears_model_ Save")


# In[500]:


mysavemodelpath = './model'  #"/data/AI/RNN/test01/codeve_model_10s_reg_addfft"
model = model_manydim()
save_function = Save()


# In[501]:


X_train, X_test, y_train, y_test = train_test_split(data, label, test_size=0.2, random_state=12)


# In[502]:


y_test


# In[503]:


def deal_data(train_date_c1s,train_date_rs):

    train_date_c1s = train_date_c1s.astype(float)
    train_date_rs = train_date_rs.astype(float)
    trainrs_stat = to_categorical(train_date_rs, num_classes=3)

    # 数据标准化
    vgac1 = np.mean(train_date_c1s)
    varc1 = math.sqrt(np.var(train_date_c1s))
    train_date_c1s = (train_date_c1s - vgac1) / varc1

    return train_date_c1s,trainrs_stat


# In[504]:


train_1,train_Y = deal_data(X_train,y_train)
test_1,test_Y = deal_data(X_test,y_test)


# In[505]:


import time
begin = time.time()
history = model.fit(train_1, train_Y,
          batch_size=128,  # 100 is vip
          epochs=500,
          shuffle=True,
          callbacks=[save_function],
          validation_data=(test_1, test_Y))
end = time.time()
print(end-begin)


# In[506]:


plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.savefig('./acc.jpg')
#plt.show()

# 绘制训练 & 验证的损失值
fig = plt.figure()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.savefig('./loss.jpg')
#plt.show()


# In[ ]:


model = load_model('/home/kyle/work/zhang/01/kears_model_1500_0115_acc=0.8392857115609306_loss=0.3905468768732888.h5')


# In[471]:


from matplotlib import pyplot as plt
import itertools
from sklearn.metrics import confusion_matrix


# In[472]:


def plot_sonfusion_matrix(cm, classes, normalize=False, title='Confusion matrix',cmap=plt.cm.Blues):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    if normalize:
        cm = cm.astype('float')/cm.sum(axis=1)[:,np.newaxis]
    thresh = cm.max()/2.0
    for i,j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j,i,cm[i,j], horizontalalignment='center',color='white' if cm[i,j] > thresh else 'black')
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predict label')
    subfile = os.path.join(path, 'confusion_matrix_test_0_1_5_kyle_test.jpg')
    plt.savefig(subfile, bbox_inches='tight')


# In[ ]:


pred_y = model.predict(test_date_c2s)    


# In[ ]:


pred_label = np.argmax(pred_y, axis=1)
true_label = np.argmax(testrs_stat, axis=1)


# In[ ]:


confusion_mat = confusion_matrix(true_label, pred_label)
plot_sonfusion_matrix(confusion_mat, classes = range(1))

